import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # Visualization Library
import seaborn as sns
df=pd.read_csv("Click.csv")
df.head()
| Daily Time Spent on Site | Age | Area Income | Daily Internet Usage | Ad Topic Line | City | Gender | Country | Timestamp | Clicked on Ad | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 62.26 | 32.0 | 69481.85 | 172.83 | Decentralized real-time circuit | Lisafort | Male | Svalbard & Jan Mayen Islands | 2016-06-09 21:43:05 | 0 |
| 1 | 41.73 | 31.0 | 61840.26 | 207.17 | Optional full-range projection | West Angelabury | Male | Singapore | 2016-01-16 17:56:05 | 0 |
| 2 | 44.40 | 30.0 | 57877.15 | 172.83 | Total 5thgeneration standardization | Reyesfurt | Female | Guadeloupe | 2016-06-29 10:50:45 | 0 |
| 3 | 59.88 | 28.0 | 56180.93 | 207.17 | Balanced empowering success | New Michael | Female | Zambia | 2016-06-21 14:32:32 | 0 |
| 4 | 49.21 | 30.0 | 54324.73 | 201.58 | Total 5thgeneration standardization | West Richard | Female | Qatar | 2016-07-21 10:54:35 | 1 |
df.tail()
| Daily Time Spent on Site | Age | Area Income | Daily Internet Usage | Ad Topic Line | City | Gender | Country | Timestamp | Clicked on Ad | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9995 | 41.73 | 31.0 | 61840.26 | 207.17 | Profound executive flexibility | West Angelabury | Male | Singapore | 2016-01-03 03:22:15 | 1 |
| 9996 | 41.73 | 28.0 | 51501.38 | 120.49 | Managed zero tolerance concept | Kennedyfurt | Male | Luxembourg | 2016-05-28 12:20:15 | 0 |
| 9997 | 55.60 | 39.0 | 38067.08 | 124.44 | Intuitive exuding service-desk | North Randy | Female | Egypt | 2016-01-05 11:53:17 | 0 |
| 9998 | 46.61 | 50.0 | 43974.49 | 123.13 | Realigned content-based leverage | North Samantha | Female | Malawi | 2016-04-04 07:07:46 | 1 |
| 9999 | 46.61 | 43.0 | 60575.99 | 198.45 | Optimized upward-trending productivity | Port Jeffrey | Male | Northern Mariana Islands | 2016-04-03 21:13:46 | 1 |
df.info
<bound method DataFrame.info of Daily Time Spent on Site Age Area Income Daily Internet Usage \
0 62.26 32.0 69481.85 172.83
1 41.73 31.0 61840.26 207.17
2 44.40 30.0 57877.15 172.83
3 59.88 28.0 56180.93 207.17
4 49.21 30.0 54324.73 201.58
... ... ... ... ...
9995 41.73 31.0 61840.26 207.17
9996 41.73 28.0 51501.38 120.49
9997 55.60 39.0 38067.08 124.44
9998 46.61 50.0 43974.49 123.13
9999 46.61 43.0 60575.99 198.45
Ad Topic Line City Gender \
0 Decentralized real-time circuit Lisafort Male
1 Optional full-range projection West Angelabury Male
2 Total 5thgeneration standardization Reyesfurt Female
3 Balanced empowering success New Michael Female
4 Total 5thgeneration standardization West Richard Female
... ... ... ...
9995 Profound executive flexibility West Angelabury Male
9996 Managed zero tolerance concept Kennedyfurt Male
9997 Intuitive exuding service-desk North Randy Female
9998 Realigned content-based leverage North Samantha Female
9999 Optimized upward-trending productivity Port Jeffrey Male
Country Timestamp Clicked on Ad
0 Svalbard & Jan Mayen Islands 2016-06-09 21:43:05 0
1 Singapore 2016-01-16 17:56:05 0
2 Guadeloupe 2016-06-29 10:50:45 0
3 Zambia 2016-06-21 14:32:32 0
4 Qatar 2016-07-21 10:54:35 1
... ... ... ...
9995 Singapore 2016-01-03 03:22:15 1
9996 Luxembourg 2016-05-28 12:20:15 0
9997 Egypt 2016-01-05 11:53:17 0
9998 Malawi 2016-04-04 07:07:46 1
9999 Northern Mariana Islands 2016-04-03 21:13:46 1
[10000 rows x 10 columns]>
df.describe
<bound method NDFrame.describe of Daily Time Spent on Site Age Area Income Daily Internet Usage \
0 62.26 32.0 69481.85 172.83
1 41.73 31.0 61840.26 207.17
2 44.40 30.0 57877.15 172.83
3 59.88 28.0 56180.93 207.17
4 49.21 30.0 54324.73 201.58
... ... ... ... ...
9995 41.73 31.0 61840.26 207.17
9996 41.73 28.0 51501.38 120.49
9997 55.60 39.0 38067.08 124.44
9998 46.61 50.0 43974.49 123.13
9999 46.61 43.0 60575.99 198.45
Ad Topic Line City Gender \
0 Decentralized real-time circuit Lisafort Male
1 Optional full-range projection West Angelabury Male
2 Total 5thgeneration standardization Reyesfurt Female
3 Balanced empowering success New Michael Female
4 Total 5thgeneration standardization West Richard Female
... ... ... ...
9995 Profound executive flexibility West Angelabury Male
9996 Managed zero tolerance concept Kennedyfurt Male
9997 Intuitive exuding service-desk North Randy Female
9998 Realigned content-based leverage North Samantha Female
9999 Optimized upward-trending productivity Port Jeffrey Male
Country Timestamp Clicked on Ad
0 Svalbard & Jan Mayen Islands 2016-06-09 21:43:05 0
1 Singapore 2016-01-16 17:56:05 0
2 Guadeloupe 2016-06-29 10:50:45 0
3 Zambia 2016-06-21 14:32:32 0
4 Qatar 2016-07-21 10:54:35 1
... ... ... ...
9995 Singapore 2016-01-03 03:22:15 1
9996 Luxembourg 2016-05-28 12:20:15 0
9997 Egypt 2016-01-05 11:53:17 0
9998 Malawi 2016-04-04 07:07:46 1
9999 Northern Mariana Islands 2016-04-03 21:13:46 1
[10000 rows x 10 columns]>
df.columns
Index(['Daily Time Spent on Site', 'Age', 'Area Income',
'Daily Internet Usage', 'Ad Topic Line', 'City', 'Gender', 'Country',
'Timestamp', 'Clicked on Ad'],
dtype='object')
df.shape
(10000, 10)
df.size
100000
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default
'plotly'
df['Clicked on Ad']=df["Clicked on Ad"].map({0:"No",1:"Yes"})
fig=px.box(df,
x="Daily Time Spent on Site",
color="Clicked on Ad",
title="Click through rate based time spent on site",
color_discrete_map={'Yes':'Blue',
'No':'Red'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
fig=px.box(df,
x="Daily Internet Usage",
color="Clicked on Ad",
title="Click through rate based on Daily Internet Usage",
color_discrete_map={'Yes':'Blue',
'No':'Red'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
fig=px.box(df,
x="Age",
color="Clicked on Ad",
title="Click through rate based on Age",
color_discrete_map={'Yes':'Blue',
'No':'Red'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
fig=px.box(df,
x="Area Income",
color="Clicked on Ad",
title="Click Through Rate based on Income",
color_discrete_map={'Yes':'Blue',
'No':'Red'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
df['Clicked on Ad'].value_counts()
Clicked on Ad No 5083 Yes 4917 Name: count, dtype: int64
click_through_rate=4917/10000*100
print(click_through_rate)
49.17